source("../shared/functions.R")
here::i_am("rmd3/a1_preprocess.rmd")
## here() starts at /Users/wang.13246/Documents/Project/Huazhu_scRNAseq
print(paste("Current working directory:", here::here()))
## [1] "Current working directory: /Users/wang.13246/Documents/Project/Huazhu_scRNAseq"
Load all 10X data from ../data_19072-23-26
directory.
data_dir <- "../data_19072-23-26"
sample_dirs <- list.files(data_dir)
data_dirs <- file.path(data_dir, sample_dirs, "filtered_feature_bc_matrix")
data_list <- list()
cat(paste("Loading from:", data_dir, "\n"))
## Loading from: ../data_19072-23-26
cat(paste("Found", length(data_dirs), "samples to load.\n\n"))
## Found 8 samples to load.
for (i in seq_along(data_dirs)){
current_sample_name <- sample_dirs[i]
cat(paste("Loading sample:", current_sample_name, "\n"))
tmp <- Read10X(
data_dirs[i],
unique.features = TRUE,
strip.suffix = FALSE
)
data_list[[i]] <- CreateSeuratObject(
tmp,
assay = "RNA",
min.cells = 3,
min.features = 200,
project = current_sample_name
)
# Add QC metrics
# Ribosomal genes
rb.genes <- rownames(data_list[[i]])[grep("^Rp[sl][[:digit:]]", rownames(data_list[[i]]))]
if (length(rb.genes) > 0) {
percent.ribo <- colSums(GetAssayData(data_list[[i]], slot="counts")[rb.genes, ]) /
Matrix::colSums(GetAssayData(data_list[[i]], slot="counts")) * 100
data_list[[i]] <- AddMetaData(data_list[[i]], percent.ribo, col.name = "percent.ribo")
} else {
data_list[[i]]$percent.ribo <- 0
}
# Mitochondrial genes
data_list[[i]] <- PercentageFeatureSet(data_list[[i]], "^mt-", col.name = "percent.mito")
# Hemoglobin genes (potential contamination)
data_list[[i]] <- PercentageFeatureSet(data_list[[i]], "^Hb[^(p)]", col.name = "percent.hb")
# Add sample name as metadata
data_list[[i]]@meta.data$sample <- current_sample_name
}
## Loading sample: 19072-23-01-01-01
## Warning: The `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
## ℹ Please use the `layer` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Loading sample: 19072-23-02-01-01
## Loading sample: 19072-23-03-01-01
## Loading sample: 19072-23-04-01-01
## Loading sample: 19072-26-01-01-01
## Loading sample: 19072-26-02-01-01
## Loading sample: 19072-26-03-01-01
## Loading sample: 19072-26-04-01-01
# Get all loaded sample names
all_sample_names <- sapply(data_list, function(x) unique(x$sample))
names(data_list) <- all_sample_names
cat("\nAll loaded sample names:\n")
##
## All loaded sample names:
print(all_sample_names)
## [1] "19072-23-01-01-01" "19072-23-02-01-01" "19072-23-03-01-01"
## [4] "19072-23-04-01-01" "19072-26-01-01-01" "19072-26-02-01-01"
## [7] "19072-26-03-01-01" "19072-26-04-01-01"
cat(paste("\nTotal samples loaded:", length(data_list), "\n"))
##
## Total samples loaded: 8
rm(tmp, rb.genes, percent.ribo, i, current_sample_name, data_dirs, sample_dirs)
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 8686806 464.0 14174291 757.0 NA 14174291 757.0
## Vcells 441907191 3371.5 1038783050 7925.3 102400 1033656499 7886.2
This function uses scuttle::isOutlier for automated,
data-driven QC thresholds.
automated_qc_filtering <- function(seurat_obj, sample_name, nmads = 3) {
cat(paste("\n Processing", sample_name, "with nmads =", nmads, "...\n"))
df <- seurat_obj@meta.data
# Identify outliers for each metric
# Library size (nCount_RNA) - both directions, log scale
outlier_libsize <- isOutlier(df$nCount_RNA, type = "both", log = TRUE, nmads = nmads)
# Number of features (nFeature_RNA) - both directions, log scale
outlier_features <- isOutlier(df$nFeature_RNA, type = "both", log = TRUE, nmads = nmads)
# Mitochondrial percentage - higher direction only
outlier_mito <- isOutlier(df$percent.mito, type = "higher", nmads = nmads)
# Combine all outlier criteria
outlier_combined <- outlier_libsize | outlier_features | outlier_mito
# Extract thresholds
thresh_libsize <- attr(outlier_libsize, "thresholds")
thresh_features <- attr(outlier_features, "thresholds")
thresh_mito <- attr(outlier_mito, "thresholds")
cat(" Adaptive thresholds calculated:\n")
cat(" nCount_RNA: [", round(thresh_libsize["lower"]), " - ",
round(thresh_libsize["higher"]), "]\n", sep = "")
cat(" nFeature_RNA: [", round(thresh_features["lower"]), " - ",
round(thresh_features["higher"]), "]\n", sep = "")
cat(" percent.mito: < ", round(thresh_mito["higher"], 2), "\n", sep = "")
cat("\n Outliers identified:\n")
cat(" Library size (low/high UMI):", sum(outlier_libsize),
"(", round(sum(outlier_libsize)/nrow(df)*100, 1), "%)\n")
cat(" Feature count (low/high genes):", sum(outlier_features),
"(", round(sum(outlier_features)/nrow(df)*100, 1), "%)\n")
cat(" High mitochondrial:", sum(outlier_mito),
"(", round(sum(outlier_mito)/nrow(df)*100, 1), "%)\n")
cat(" TOTAL outliers:", sum(outlier_combined),
"(", round(sum(outlier_combined)/nrow(df)*100, 1), "%)\n")
cat(" Cells to KEEP:", sum(!outlier_combined),
"(", round(sum(!outlier_combined)/nrow(df)*100, 1), "%)\n")
# Add outlier flags to metadata
seurat_obj$outlier_libsize <- outlier_libsize
seurat_obj$outlier_features <- outlier_features
seurat_obj$outlier_mito <- outlier_mito
seurat_obj$outlier_combined <- outlier_combined
# Store thresholds
seurat_obj@misc$qc_thresholds <- list(
libsize_lower = thresh_libsize["lower"],
libsize_higher = thresh_libsize["higher"],
features_lower = thresh_features["lower"],
features_higher = thresh_features["higher"],
mito_higher = thresh_mito["higher"]
)
return(seurat_obj)
}
process_project <- function(seurat_list, project_name, sample_metadata_df, output_file,
nmads = 3,
run_harmony = TRUE,
harmony_group_by = "sample") {
cat(paste("\n========================================\n"))
cat(paste("Processing Project:", project_name, "\n"))
cat(paste("========================================\n"))
# 1. Apply automated QC to each sample
cat("\n--- Step 1: Automated QC with scuttle::isOutlier ---\n")
for (i in seq_along(seurat_list)) {
sample_name <- unique(seurat_list[[i]]$sample)
seurat_list[[i]] <- automated_qc_filtering(seurat_list[[i]], sample_name, nmads = nmads)
}
# 2. Pre-filtering summary
cat("\n--- Step 2: Pre-filtering QC Summary ---\n")
pre_filter_counts <- sapply(seurat_list, ncol)
names(pre_filter_counts) <- sapply(seurat_list, function(x) unique(x$sample))
print(data.frame(Sample = names(pre_filter_counts), Cells_Before = pre_filter_counts))
# 3. Filter outliers
cat("\n--- Step 3: Filtering Outlier Cells ---\n")
for (i in seq_along(seurat_list)) {
sample_name <- unique(seurat_list[[i]]$sample)
n_before <- ncol(seurat_list[[i]])
seurat_list[[i]] <- subset(seurat_list[[i]], subset = outlier_combined == FALSE)
n_after <- ncol(seurat_list[[i]])
cat(paste(" ", sample_name, ":", n_before, "->", n_after, "cells\n"))
}
# 4. Merge objects
cat("\n--- Step 4: Merging Samples ---\n")
if (length(seurat_list) > 1) {
sample_names_for_merge <- sapply(seurat_list, function(x) unique(x$sample))
combined <- merge(x = seurat_list[[1]],
y = seurat_list[2:length(seurat_list)],
add.cell.ids = sample_names_for_merge)
} else {
combined <- seurat_list[[1]]
}
# Fix sample column from orig.ident
if ("orig.ident" %in% colnames(combined@meta.data)) {
combined$sample <- combined$orig.ident
}
cat(paste("Total cells after filtering:", ncol(combined), "\n"))
# 5. Add project metadata
cat("\n--- Step 5: Adding Project Metadata ---\n")
original_meta <- combined@meta.data
original_meta$orig.ident_join_key <- original_meta$orig.ident
sample_metadata_df$orig.ident_join_key <- sample_metadata_df$sample
new_meta <- dplyr::left_join(original_meta,
sample_metadata_df[, !colnames(sample_metadata_df) == "sample"],
by = "orig.ident_join_key")
rownames(new_meta) <- rownames(original_meta)
combined@meta.data <- new_meta
new_cols <- colnames(sample_metadata_df)[!colnames(sample_metadata_df) %in% c("sample", "orig.ident_join_key")]
for(col in new_cols) {
cat(paste("--- Table for:", col, "---\n"))
print(table(combined@meta.data[, col], useNA = "ifany"))
}
# 6. Post-filtering summary
cat("\n--- Step 6: Post-filtering Statistics ---\n")
qc_summary <- combined@meta.data %>%
group_by(sample) %>%
summarise(
n_cells = n(),
median_nFeature = median(nFeature_RNA),
median_nCount = median(nCount_RNA),
median_mito = round(median(percent.mito), 2),
.groups = "drop"
)
print(qc_summary)
# 7. QC Violin plots
cat("\n--- Step 7: Generating QC Plots ---\n")
p_qc <- VlnPlot(combined, features = c("nFeature_RNA", "nCount_RNA", "percent.mito"),
group.by = "sample", pt.size = 0, ncol = 3) +
plot_annotation(title = paste(project_name, "- QC After Filtering"))
print(p_qc)
# 8. Standard Seurat workflow
cat("\n--- Step 8: Running Seurat Analysis Pipeline ---\n")
cat(" Normalizing...\n")
combined <- NormalizeData(combined, normalization.method = "LogNormalize",
scale.factor = 10000, verbose = FALSE)
cat(" Finding variable features...\n")
combined <- FindVariableFeatures(combined, selection.method = "vst",
nfeatures = 3000, verbose = FALSE)
cat(" Scaling data (regressing out percent.mito)...\n")
combined <- ScaleData(combined, vars.to.regress = "percent.mito", verbose = FALSE)
cat(" Running PCA...\n")
combined <- RunPCA(combined, npcs = 50, verbose = FALSE)
# Elbow plot
p_elbow <- ElbowPlot(combined, ndims = 50) + ggtitle(paste(project_name, "- PCA Elbow Plot"))
print(p_elbow)
reduction_to_use <- "pca"
# 9. Harmony batch correction
if (run_harmony && length(unique(combined@meta.data[[harmony_group_by]])) > 1) {
cat("\n--- Step 9: Running Harmony Batch Correction ---\n")
combined <- harmony::RunHarmony(
combined,
group.by.vars = harmony_group_by,
assay.use = "RNA",
verbose = FALSE
)
reduction_to_use <- "harmony"
cat(" Using Harmony reduction\n")
} else {
cat("\n--- Step 9: Skipping Harmony (single sample or disabled) ---\n")
}
# 10. UMAP and clustering
cat("\n--- Step 10: Running UMAP and Clustering ---\n")
n_dims <- 30
combined <- RunUMAP(combined, reduction = reduction_to_use, dims = 1:n_dims, verbose = FALSE)
combined <- FindNeighbors(combined, reduction = reduction_to_use, dims = 1:n_dims, verbose = FALSE)
# Test multiple resolutions
resolutions <- c(0.1, 0.2, 0.3, 0.5, 0.8, 1.0)
for (res in resolutions) {
combined <- FindClusters(combined, resolution = res, verbose = FALSE)
}
cat(" Cluster counts:\n")
for (res in resolutions) {
col_name <- paste0("RNA_snn_res.", res)
n_clusters <- length(unique(combined@meta.data[[col_name]]))
cat(" Resolution", res, ":", n_clusters, "clusters\n")
}
Idents(combined) <- "RNA_snn_res.0.3"
# 11. Visualizations
cat("\n--- Step 11: Generating UMAP Plots ---\n")
p_cluster <- DimPlot(combined, group.by = "RNA_snn_res.0.3", label = TRUE, label.size = 5) +
ggtitle(paste(project_name, "- Clusters (Resolution 0.3)"))
p_sample <- DimPlot(combined, group.by = "sample", label = FALSE, pt.size = 0.3) +
ggtitle(paste(project_name, "- Sample Distribution"))
print(p_cluster | p_sample)
# Split by sample
p_split <- DimPlot(combined, split.by = "sample", label = TRUE, ncol = 2) +
ggtitle(paste(project_name, "- Clusters Split by Sample"))
print(p_split)
# QC on UMAP
p_qc_umap <- FeaturePlot(combined, features = c("nFeature_RNA", "nCount_RNA",
"percent.mito", "percent.ribo"), ncol = 2)
print(p_qc_umap)
# Plot by metadata groups
for (group_name in new_cols) {
p_group <- DimPlot(combined, group.by = group_name, label = TRUE, pt.size = 0.3,
repel = TRUE, label.box = TRUE) +
ggtitle(paste(project_name, "-", group_name))
print(p_group)
}
# 12. Check marker genes
cat("\n--- Step 12: Checking Marker Genes ---\n")
markers <- c(
"Tnnt2", "Myh6", "Myh7", # Cardiomyocytes
"Col1a1", "Col3a1", "Dcn", # Fibroblasts
"Pecam1", "Cdh5", "Vwf", # Endothelial
"Ptprc", "Cd68", # Immune
"Acta2", "Myh11" # Smooth muscle
)
markers_present <- markers[markers %in% rownames(combined)]
cat(" Markers found:", paste(markers_present, collapse = ", "), "\n")
if (length(markers_present) > 0) {
p_markers <- FeaturePlot(combined, features = markers_present[1:min(9, length(markers_present))],
ncol = 3, order = TRUE)
print(p_markers)
p_dot <- DotPlot(combined, features = markers_present, group.by = "RNA_snn_res.0.3") +
RotatedAxis() +
ggtitle(paste(project_name, "- Marker Expression by Cluster"))
print(p_dot)
}
# 13. Save
cat(paste("\n--- Step 13: Saving processed object to:", output_file, "---\n"))
qs::qsave(combined, output_file)
cat(paste("\n========================================\n"))
cat(paste("Finished Project:", project_name, "\n"))
cat(paste("Total cells:", ncol(combined), "\n"))
cat(paste("========================================\n\n"))
return(combined)
}
Samples: - 19072-23-01-01-01 -
19072-23-02-01-01 - 19072-23-03-01-01 -
19072-23-04-01-01
proj23_sample_names <- c("19072-23-01-01-01", "19072-23-02-01-01",
"19072-23-03-01-01", "19072-23-04-01-01")
proj23_indices <- which(all_sample_names %in% proj23_sample_names)
if (length(proj23_indices) > 0) {
proj23_list <- data_list[proj23_indices]
# Create metadata - UPDATE group COLUMN WITH ACTUAL EXPERIMENTAL GROUPS
proj23_meta <- data.frame(
sample = proj23_sample_names,
group = c("Sample_01", "Sample_02", "Sample_03", "Sample_04") # TODO: Update with real group labels
)
proj23_obj <- process_project(
seurat_list = proj23_list,
project_name = "Project_19072_23",
sample_metadata_df = proj23_meta,
output_file = "proj23_harmony_processed.qsave",
nmads = 3, # Adjust: 2.5 = stricter, 5 = more lenient
run_harmony = TRUE,
harmony_group_by = "sample"
)
} else {
cat("No samples found for Project 19072-23.\n")
cat("Looking for:", proj23_sample_names, "\n")
cat("Available samples:", all_sample_names, "\n")
}
##
## ========================================
## Processing Project: Project_19072_23
## ========================================
##
## --- Step 1: Automated QC with scuttle::isOutlier ---
##
## Processing 19072-23-01-01-01 with nmads = 3 ...
## Adaptive thresholds calculated:
## nCount_RNA: [217 - 2522402]
## nFeature_RNA: [566 - 32886]
## percent.mito: < 2.07
##
## Outliers identified:
## Library size (low/high UMI): 0 ( 0 %)
## Feature count (low/high genes): 368 ( 5.5 %)
## High mitochondrial: 431 ( 6.5 %)
## TOTAL outliers: 629 ( 9.5 %)
## Cells to KEEP: 6021 ( 90.5 %)
##
## Processing 19072-23-02-01-01 with nmads = 3 ...
## Adaptive thresholds calculated:
## nCount_RNA: [30 - 5213341]
## nFeature_RNA: [154 - 74539]
## percent.mito: < 2.29
##
## Outliers identified:
## Library size (low/high UMI): 0 ( 0 %)
## Feature count (low/high genes): 0 ( 0 %)
## High mitochondrial: 469 ( 8.3 %)
## TOTAL outliers: 469 ( 8.3 %)
## Cells to KEEP: 5191 ( 91.7 %)
##
## Processing 19072-23-03-01-01 with nmads = 3 ...
## Adaptive thresholds calculated:
## nCount_RNA: [2046 - 254699]
## nFeature_RNA: [1395 - 12072]
## percent.mito: < 1.79
##
## Outliers identified:
## Library size (low/high UMI): 1257 ( 16.9 %)
## Feature count (low/high genes): 1348 ( 18.1 %)
## High mitochondrial: 454 ( 6.1 %)
## TOTAL outliers: 1536 ( 20.7 %)
## Cells to KEEP: 5896 ( 79.3 %)
##
## Processing 19072-23-04-01-01 with nmads = 3 ...
## Adaptive thresholds calculated:
## nCount_RNA: [3351 - 153402]
## nFeature_RNA: [1785 - 9070]
## percent.mito: < 1.7
##
## Outliers identified:
## Library size (low/high UMI): 466 ( 5.8 %)
## Feature count (low/high genes): 598 ( 7.4 %)
## High mitochondrial: 440 ( 5.5 %)
## TOTAL outliers: 756 ( 9.4 %)
## Cells to KEEP: 7297 ( 90.6 %)
##
## --- Step 2: Pre-filtering QC Summary ---
## Sample Cells_Before
## 19072-23-01-01-01 19072-23-01-01-01 6650
## 19072-23-02-01-01 19072-23-02-01-01 5660
## 19072-23-03-01-01 19072-23-03-01-01 7432
## 19072-23-04-01-01 19072-23-04-01-01 8053
##
## --- Step 3: Filtering Outlier Cells ---
## 19072-23-01-01-01 : 6650 -> 6021 cells
## 19072-23-02-01-01 : 5660 -> 5191 cells
## 19072-23-03-01-01 : 7432 -> 5896 cells
## 19072-23-04-01-01 : 8053 -> 7297 cells
##
## --- Step 4: Merging Samples ---
## Total cells after filtering: 24405
##
## --- Step 5: Adding Project Metadata ---
## --- Table for: group ---
##
## Sample_01 Sample_02 Sample_03 Sample_04
## 6021 5191 5896 7297
##
## --- Step 6: Post-filtering Statistics ---
## # A tibble: 4 × 5
## sample n_cells median_nFeature median_nCount median_mito
## <chr> <int> <dbl> <dbl> <dbl>
## 1 19072-23-01-01-01 6021 4671 27762 0.81
## 2 19072-23-02-01-01 5191 3954 16982 0.99
## 3 19072-23-03-01-01 5896 4356 26744. 0.82
## 4 19072-23-04-01-01 7297 4126 24426 0.8
##
## --- Step 7: Generating QC Plots ---
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
##
## --- Step 8: Running Seurat Analysis Pipeline ---
## Normalizing...
## Finding variable features...
## Scaling data (regressing out percent.mito)...
## Running PCA...
##
## --- Step 9: Running Harmony Batch Correction ---
## Using Harmony reduction
##
## --- Step 10: Running UMAP and Clustering ---
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## Cluster counts:
## Resolution 0.1 : 10 clusters
## Resolution 0.2 : 13 clusters
## Resolution 0.3 : 16 clusters
## Resolution 0.5 : 17 clusters
## Resolution 0.8 : 22 clusters
## Resolution 1 : 25 clusters
##
## --- Step 11: Generating UMAP Plots ---
##
## --- Step 12: Checking Marker Genes ---
## Markers found: Tnnt2, Myh6, Myh7, Col1a1, Col3a1, Dcn, Pecam1, Cdh5, Vwf, Ptprc, Cd68, Acta2, Myh11
##
## --- Step 13: Saving processed object to: proj23_harmony_processed.qsave ---
##
## ========================================
## Finished Project: Project_19072_23
## Total cells: 24405
## ========================================
Samples: - 19072-26-01-01-01 -
19072-26-02-01-01 - 19072-26-03-01-01 -
19072-26-04-01-01
proj26_sample_names <- c("19072-26-01-01-01", "19072-26-02-01-01",
"19072-26-03-01-01", "19072-26-04-01-01")
proj26_indices <- which(all_sample_names %in% proj26_sample_names)
if (length(proj26_indices) > 0) {
proj26_list <- data_list[proj26_indices]
# Create metadata - UPDATE group COLUMN WITH ACTUAL EXPERIMENTAL GROUPS
proj26_meta <- data.frame(
sample = proj26_sample_names,
group = c("Sample_01", "Sample_02", "Sample_03", "Sample_04") # TODO: Update with real group labels
)
proj26_obj <- process_project(
seurat_list = proj26_list,
project_name = "Project_19072_26",
sample_metadata_df = proj26_meta,
output_file = "proj26_harmony_processed.qsave",
nmads = 3, # Adjust: 2.5 = stricter, 5 = more lenient
run_harmony = TRUE,
harmony_group_by = "sample"
)
} else {
cat("No samples found for Project 19072-26.\n")
cat("Looking for:", proj26_sample_names, "\n")
cat("Available samples:", all_sample_names, "\n")
}
##
## ========================================
## Processing Project: Project_19072_26
## ========================================
##
## --- Step 1: Automated QC with scuttle::isOutlier ---
##
## Processing 19072-26-01-01-01 with nmads = 3 ...
## Adaptive thresholds calculated:
## nCount_RNA: [215 - 2860]
## nFeature_RNA: [138 - 1566]
## percent.mito: < 9.44
##
## Outliers identified:
## Library size (low/high UMI): 4096 ( 4.5 %)
## Feature count (low/high genes): 4017 ( 4.4 %)
## High mitochondrial: 176 ( 0.2 %)
## TOTAL outliers: 4517 ( 5 %)
## Cells to KEEP: 86722 ( 95 %)
##
## Processing 19072-26-02-01-01 with nmads = 3 ...
## Adaptive thresholds calculated:
## nCount_RNA: [253 - 4504]
## nFeature_RNA: [165 - 2196]
## percent.mito: < 10.58
##
## Outliers identified:
## Library size (low/high UMI): 2074 ( 3.5 %)
## Feature count (low/high genes): 1727 ( 2.9 %)
## High mitochondrial: 123 ( 0.2 %)
## TOTAL outliers: 2235 ( 3.8 %)
## Cells to KEEP: 56642 ( 96.2 %)
##
## Processing 19072-26-03-01-01 with nmads = 3 ...
## Adaptive thresholds calculated:
## nCount_RNA: [182 - 14404]
## nFeature_RNA: [112 - 554]
## percent.mito: < 22.29
##
## Outliers identified:
## Library size (low/high UMI): 3 ( 0.2 %)
## Feature count (low/high genes): 134 ( 10.5 %)
## High mitochondrial: 106 ( 8.3 %)
## TOTAL outliers: 240 ( 18.8 %)
## Cells to KEEP: 1039 ( 81.2 %)
##
## Processing 19072-26-04-01-01 with nmads = 3 ...
## Adaptive thresholds calculated:
## nCount_RNA: [305 - 3992]
## nFeature_RNA: [200 - 2023]
## percent.mito: < 10.58
##
## Outliers identified:
## Library size (low/high UMI): 4584 ( 5.1 %)
## Feature count (low/high genes): 4076 ( 4.6 %)
## High mitochondrial: 59 ( 0.1 %)
## TOTAL outliers: 4745 ( 5.3 %)
## Cells to KEEP: 84285 ( 94.7 %)
##
## --- Step 2: Pre-filtering QC Summary ---
## Sample Cells_Before
## 19072-26-01-01-01 19072-26-01-01-01 91239
## 19072-26-02-01-01 19072-26-02-01-01 58877
## 19072-26-03-01-01 19072-26-03-01-01 1279
## 19072-26-04-01-01 19072-26-04-01-01 89030
##
## --- Step 3: Filtering Outlier Cells ---
## 19072-26-01-01-01 : 91239 -> 86722 cells
## 19072-26-02-01-01 : 58877 -> 56642 cells
## 19072-26-03-01-01 : 1279 -> 1039 cells
## 19072-26-04-01-01 : 89030 -> 84285 cells
##
## --- Step 4: Merging Samples ---
## Total cells after filtering: 228688
##
## --- Step 5: Adding Project Metadata ---
## --- Table for: group ---
##
## Sample_01 Sample_02 Sample_03 Sample_04
## 86722 56642 1039 84285
##
## --- Step 6: Post-filtering Statistics ---
## # A tibble: 4 × 5
## sample n_cells median_nFeature median_nCount median_mito
## <chr> <int> <dbl> <dbl> <dbl>
## 1 19072-26-01-01-01 86722 453 764 5.2
## 2 19072-26-02-01-01 56642 589 1042 6.05
## 3 19072-26-03-01-01 1039 242 1431 3.83
## 4 19072-26-04-01-01 84285 619 1071 5.61
##
## --- Step 7: Generating QC Plots ---
## Warning: Default search for "data" layer in "RNA" assay yielded no results;
## utilizing "counts" layer instead.
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
##
## --- Step 8: Running Seurat Analysis Pipeline ---
## Normalizing...
## Finding variable features...
## Scaling data (regressing out percent.mito)...
## Running PCA...
##
## --- Step 9: Running Harmony Batch Correction ---
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 11434400)
## Using Harmony reduction
##
## --- Step 10: Running UMAP and Clustering ---
## Cluster counts:
## Resolution 0.1 : 20 clusters
## Resolution 0.2 : 20 clusters
## Resolution 0.3 : 21 clusters
## Resolution 0.5 : 23 clusters
## Resolution 0.8 : 27 clusters
## Resolution 1 : 28 clusters
##
## --- Step 11: Generating UMAP Plots ---
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
##
## --- Step 12: Checking Marker Genes ---
## Markers found: Tnnt2, Myh6, Myh7, Col1a1, Col3a1, Dcn, Pecam1, Cdh5, Vwf, Ptprc, Cd68, Acta2, Myh11
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
##
## --- Step 13: Saving processed object to: proj26_harmony_processed.qsave ---
##
## ========================================
## Finished Project: Project_19072_26
## Total cells: 228688
## ========================================
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.7.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] Matrix_1.7-0 harmony_1.2.0
## [3] Rcpp_1.1.0 Polychrome_1.5.1
## [5] qs_0.26.3 here_1.0.1
## [7] patchwork_1.2.0 ggplot2_3.5.0
## [9] dplyr_1.1.4 cowplot_1.1.3
## [11] Seurat_5.1.0 SeuratObject_5.0.2
## [13] sp_2.1-4 scuttle_1.14.0
## [15] SingleCellExperiment_1.26.0 SummarizedExperiment_1.34.0
## [17] Biobase_2.64.0 GenomicRanges_1.56.1
## [19] GenomeInfoDb_1.40.1 IRanges_2.38.1
## [21] S4Vectors_0.42.1 BiocGenerics_0.50.0
## [23] MatrixGenerics_1.16.0 matrixStats_1.3.0
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.4.1
## [3] later_1.3.2 tibble_3.2.1
## [5] R.oo_1.26.0 polyclip_1.10-7
## [7] fastDummies_1.7.4 lifecycle_1.0.4
## [9] rprojroot_2.0.4 globals_0.16.3
## [11] lattice_0.22-6 MASS_7.3-60.2
## [13] magrittr_2.0.3 plotly_4.10.4
## [15] sass_0.4.9 rmarkdown_2.28
## [17] jquerylib_0.1.4 yaml_2.3.10
## [19] httpuv_1.6.15 sctransform_0.4.1
## [21] spam_2.10-0 spatstat.sparse_3.1-0
## [23] reticulate_1.42.0 pbapply_1.7-2
## [25] RColorBrewer_1.1-3 abind_1.4-5
## [27] zlibbioc_1.50.0 Rtsne_0.17
## [29] purrr_1.0.2 R.utils_2.12.3
## [31] GenomeInfoDbData_1.2.12 ggrepel_0.9.5
## [33] irlba_2.3.5.1 listenv_0.9.1
## [35] spatstat.utils_3.1-0 goftest_1.2-3
## [37] RSpectra_0.16-2 spatstat.random_3.3-1
## [39] fitdistrplus_1.2-1 parallelly_1.38.0
## [41] DelayedMatrixStats_1.26.0 leiden_0.4.3.1
## [43] codetools_0.2-20 DelayedArray_0.30.1
## [45] RApiSerialize_0.1.3 tidyselect_1.2.1
## [47] UCSC.utils_1.0.0 farver_2.1.2
## [49] spatstat.explore_3.3-1 jsonlite_1.8.8
## [51] progressr_0.14.0 ggridges_0.5.6
## [53] survival_3.6-4 tools_4.4.1
## [55] ica_1.0-3 glue_1.8.0
## [57] gridExtra_2.3 SparseArray_1.4.8
## [59] xfun_0.52 withr_3.0.1
## [61] BiocManager_1.30.26 fastmap_1.2.0
## [63] fansi_1.0.6 digest_0.6.36
## [65] R6_2.5.1 mime_0.12
## [67] colorspace_2.1-1 scattermore_1.2
## [69] tensor_1.5 dichromat_2.0-0.1
## [71] spatstat.data_3.1-2 R.methodsS3_1.8.2
## [73] RhpcBLASctl_0.23-42 utf8_1.2.4
## [75] tidyr_1.3.1 generics_0.1.3
## [77] data.table_1.15.4 httr_1.4.7
## [79] htmlwidgets_1.6.4 S4Arrays_1.4.1
## [81] scatterplot3d_0.3-44 uwot_0.2.2
## [83] pkgconfig_2.0.3 gtable_0.3.6
## [85] lmtest_0.9-40 XVector_0.44.0
## [87] htmltools_0.5.8.1 dotCall64_1.1-1
## [89] scales_1.4.0 png_0.1-8
## [91] spatstat.univar_3.0-0 knitr_1.48
## [93] rstudioapi_0.16.0 reshape2_1.4.4
## [95] nlme_3.1-164 cachem_1.1.0
## [97] zoo_1.8-12 stringr_1.5.1
## [99] KernSmooth_2.23-24 vipor_0.4.7
## [101] parallel_4.4.1 miniUI_0.1.1.1
## [103] ggrastr_1.0.2 pillar_1.9.0
## [105] grid_4.4.1 vctrs_0.6.5
## [107] RANN_2.6.1 promises_1.3.0
## [109] stringfish_0.16.0 beachmat_2.20.0
## [111] xtable_1.8-4 cluster_2.1.6
## [113] beeswarm_0.4.0 evaluate_0.24.0
## [115] cli_3.6.3 compiler_4.4.1
## [117] rlang_1.1.4 crayon_1.5.3
## [119] future.apply_1.11.2 labeling_0.4.3
## [121] ggbeeswarm_0.7.2 plyr_1.8.9
## [123] stringi_1.8.4 viridisLite_0.4.2
## [125] deldir_2.0-4 BiocParallel_1.38.0
## [127] lazyeval_0.2.2 spatstat.geom_3.3-2
## [129] RcppHNSW_0.6.0 sparseMatrixStats_1.16.0
## [131] future_1.34.0 shiny_1.9.1
## [133] highr_0.11 ROCR_1.0-11
## [135] igraph_2.0.3 RcppParallel_5.1.8
## [137] bslib_0.8.0